{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "46f32ff1",
   "metadata": {},
   "source": [
    "# Lesson 2: Tool Calling"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fb345ad0",
   "metadata": {},
   "source": [
    "## Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5bbf530-3f05-434c-a70f-ac2cc4b8f7aa",
   "metadata": {
    "height": 46,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from helper import get_openai_api_key\n",
    "OPENAI_API_KEY = get_openai_api_key()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4c06c95-e8b2-4574-b14d-685876aa1c47",
   "metadata": {
    "height": 46,
    "tags": []
   },
   "outputs": [],
   "source": [
    "import nest_asyncio\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2e53c064",
   "metadata": {},
   "source": [
    "## 1. Define a Simple Tool"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "071b717a-93cc-4332-b357-59a693359563",
   "metadata": {
    "height": 233,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.core.tools import FunctionTool\n",
    "\n",
    "def add(x: int, y: int) -> int:\n",
    "    \"\"\"Adds two integers together.\"\"\"\n",
    "    return x + y\n",
    "\n",
    "def mystery(x: int, y: int) -> int: \n",
    "    \"\"\"Mystery function that operates on top of two numbers.\"\"\"\n",
    "    return (x + y) * (x + y)\n",
    "\n",
    "\n",
    "add_tool = FunctionTool.from_defaults(fn=add)\n",
    "mystery_tool = FunctionTool.from_defaults(fn=mystery)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4e62118-992b-4629-9022-be8c628209c1",
   "metadata": {
    "height": 165,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.llms.openai import OpenAI\n",
    "\n",
    "llm = OpenAI(model=\"gpt-3.5-turbo\")\n",
    "response = llm.predict_and_call(\n",
    "    [add_tool, mystery_tool], \n",
    "    \"Tell me the output of the mystery function on 2 and 9\", \n",
    "    verbose=True\n",
    ")\n",
    "print(str(response))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8cb8a835",
   "metadata": {},
   "source": [
    "## 2. Define an Auto-Retrieval Tool"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6589123f",
   "metadata": {},
   "source": [
    "### Load Data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bcdea238",
   "metadata": {
    "tags": []
   },
   "source": [
    "To download this paper, below is the needed code:\n",
    "\n",
    "#!wget \"https://openreview.net/pdf?id=VtmBAGCN7o\" -O metagpt.pdf\n",
    "\n",
    "**Note**: The pdf file is included with this lesson. To access it, go to the `File` menu and select`Open...`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fbe9326c-d7b3-452b-ae52-12f000157be4",
   "metadata": {
    "height": 80,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.core import SimpleDirectoryReader\n",
    "# load documents\n",
    "documents = SimpleDirectoryReader(input_files=[\"metagpt.pdf\"]).load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5451f0a3-d0a6-4b5c-a337-8e1a343ff5f0",
   "metadata": {
    "height": 63,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.core.node_parser import SentenceSplitter\n",
    "splitter = SentenceSplitter(chunk_size=1024)\n",
    "nodes = splitter.get_nodes_from_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e0fe0a9c-1f87-4ae7-a79e-7c3cf9c395ed",
   "metadata": {
    "height": 29,
    "tags": []
   },
   "outputs": [],
   "source": [
    "print(nodes[0].get_content(metadata_mode=\"all\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d7965cba-67b8-4cca-8e5f-2b0dbc96f6b0",
   "metadata": {
    "height": 80,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.core import VectorStoreIndex\n",
    "\n",
    "vector_index = VectorStoreIndex(nodes)\n",
    "query_engine = vector_index.as_query_engine(similarity_top_k=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "560f319c-8479-40c5-9b55-480fef98deb7",
   "metadata": {
    "height": 250,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.core.vector_stores import MetadataFilters\n",
    "\n",
    "query_engine = vector_index.as_query_engine(\n",
    "    similarity_top_k=2,\n",
    "    filters=MetadataFilters.from_dicts(\n",
    "        [\n",
    "            {\"key\": \"page_label\", \"value\": \"2\"}\n",
    "        ]\n",
    "    )\n",
    ")\n",
    "\n",
    "response = query_engine.query(\n",
    "    \"What are some high-level results of MetaGPT?\", \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2da4042f-8fdb-4959-8760-86685c903cfd",
   "metadata": {
    "height": 29,
    "tags": []
   },
   "outputs": [],
   "source": [
    "print(str(response))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30bb264c-42e0-46f8-9d28-da11a8535960",
   "metadata": {
    "height": 46,
    "tags": []
   },
   "outputs": [],
   "source": [
    "for n in response.source_nodes:\n",
    "    print(n.metadata)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6c392482",
   "metadata": {},
   "source": [
    "### Define the Auto-Retrieval Tool"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2639e79b-f615-425b-85ea-8a279bb26dd0",
   "metadata": {
    "height": 624,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from typing import List\n",
    "from llama_index.core.vector_stores import FilterCondition\n",
    "\n",
    "\n",
    "def vector_query(\n",
    "    query: str, \n",
    "    page_numbers: List[str]\n",
    ") -> str:\n",
    "    \"\"\"Perform a vector search over an index.\n",
    "    \n",
    "    query (str): the string query to be embedded.\n",
    "    page_numbers (List[str]): Filter by set of pages. Leave BLANK if we want to perform a vector search\n",
    "        over all pages. Otherwise, filter by the set of specified pages.\n",
    "    \n",
    "    \"\"\"\n",
    "\n",
    "    metadata_dicts = [\n",
    "        {\"key\": \"page_label\", \"value\": p} for p in page_numbers\n",
    "    ]\n",
    "    \n",
    "    query_engine = vector_index.as_query_engine(\n",
    "        similarity_top_k=2,\n",
    "        filters=MetadataFilters.from_dicts(\n",
    "            metadata_dicts,\n",
    "            condition=FilterCondition.OR\n",
    "        )\n",
    "    )\n",
    "    response = query_engine.query(query)\n",
    "    return response\n",
    "    \n",
    "\n",
    "vector_query_tool = FunctionTool.from_defaults(\n",
    "    name=\"vector_tool\",\n",
    "    fn=vector_query\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2a408ace-cf25-425b-8248-7028ceabcd42",
   "metadata": {
    "height": 131,
    "tags": []
   },
   "outputs": [],
   "source": [
    "llm = OpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
    "response = llm.predict_and_call(\n",
    "    [vector_query_tool], \n",
    "    \"What are the high-level results of MetaGPT as described on page 2?\", \n",
    "    verbose=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6ec05565-6adf-4294-ba5c-b384220876ac",
   "metadata": {
    "height": 46,
    "tags": []
   },
   "outputs": [],
   "source": [
    "for n in response.source_nodes:\n",
    "    print(n.metadata)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fef4dec0",
   "metadata": {},
   "source": [
    "## Let's add some other tools!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55dd32e5-e29f-42ed-839a-ca937fe4743e",
   "metadata": {
    "height": 267,
    "tags": []
   },
   "outputs": [],
   "source": [
    "from llama_index.core import SummaryIndex\n",
    "from llama_index.core.tools import QueryEngineTool\n",
    "\n",
    "summary_index = SummaryIndex(nodes)\n",
    "summary_query_engine = summary_index.as_query_engine(\n",
    "    response_mode=\"tree_summarize\",\n",
    "    use_async=True,\n",
    ")\n",
    "summary_tool = QueryEngineTool.from_defaults(\n",
    "    name=\"summary_tool\",\n",
    "    query_engine=summary_query_engine,\n",
    "    description=(\n",
    "        \"Useful if you want to get a summary of MetaGPT\"\n",
    "    ),\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4228ca7c-42a0-494b-987b-5a1c5c584536",
   "metadata": {
    "height": 114,
    "tags": []
   },
   "outputs": [],
   "source": [
    "response = llm.predict_and_call(\n",
    "    [vector_query_tool, summary_tool], \n",
    "    \"What are the MetaGPT comparisons with ChatDev described on page 8?\", \n",
    "    verbose=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4aa3e8c1-a8c3-4c92-a0e4-5c081f91d966",
   "metadata": {
    "height": 46,
    "tags": []
   },
   "outputs": [],
   "source": [
    "for n in response.source_nodes:\n",
    "    print(n.metadata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21906d47-7266-4479-bbb4-9f392d5c399b",
   "metadata": {
    "height": 97,
    "tags": []
   },
   "outputs": [],
   "source": [
    "response = llm.predict_and_call(\n",
    "    [vector_query_tool, summary_tool], \n",
    "    \"What is a summary of the paper?\", \n",
    "    verbose=True\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
